############################################################################
############################################################################
##Replication: Instability and the Incentives for Corruption
##By: Campante, Chor, and Do
##Replicated by: Chiara Superti and Jennifer Pan
############################################################################
############################################################################


############################################################################
##SETUP: Load needed packages

library(foreign)
library(Zelig)
library(MASS)
library(xtable)
library(car)
library(pls)


############################################################################
##REPLICATION Table 1A: Corruption (KKM) and Average Executive Tenure
##Cross sectional regression w/ Turning Point Monte Carlo Simulation
##Stability proxy: log executive tenure 20 years window


###############Load clean cross sectional data
campante<-read.dta("crosssection_Dec07.dta")
head(campante)
attach(campante)

##Subset the dataset for countries independent before 1982
campante82<-campante[indepdate<1982,]
length(campante82$indepdate)
dim(campante82)
attach(campante82)

##Drop Switzerland: weird outlier since its system forces high turnover
##in the executive branch but this is not a sign of instability
sort(isocode)
campante82.No.CHE<-subset(campante82,isocode!="CHE")
campante82.No.CHE$isocode
detach(campante82)
attach(campante82.No.CHE)

###############Table 1A (1) Min. spec (KKM)
##Obtain coefficients
reg.KKM1 <- lm(kkm0205~lavgten_exec8201+lavgten_exec8201sq)
reg.KKM1.s <- summary(reg.KKM1) 
reg.KKM1.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.KKM1)
X <- model.matrix.mvr(reg.KKM1)
Xfake <- cbind(1,X)
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))

##Turning Point-Monte Carlo Simulation
#since now dep variable is logged, have to take the exp of it
#probability of turning point in the window of years selected
x <- reg.KKM1
s <- reg.KKM1.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
U <- residuals(x)
matrixVar <- inv(t(X)%*%X)%*%t(X)%*%diag((U*sqrt((nrow(X))/(nrow(X)-(ncol(X)))))^2)%*%X%*%inv(t(X)%*%X)
matrixVar <- as.matrix(matrixVar)
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
#dim(matrixVar)
#length(matrixsubBeta)
sims <- as.matrix(sims)
TP <- exp(-((sims[,1])/(2*(sims[,2]))))
#a <- summary(sims[,1])
#b <- summary(sims[,2])
c <- summary(TP)
#d <- quantile(sims[,1],.10)
#e <- quantile(sims[,1],.90)
#f <- quantile(sims[,2],.10)
#g <- quantile(sims[,2],.90)
#h <- quantile(TP,.10)
#i <- quantile(TP,.90)
coverage <- mean(TP>0 & TP<20)
#output <-c(coverage,a,b,c)
output.s <- c(c[3], coverage)
output.s


###############Table 1A (2) (KKM)
##Obtain coefficients
reg.KKM2 <- lm(kkm0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+reg_eap+reg_eca+reg_mena+
  reg_we+reg_na+reg_ssa+reg_lac+reg_sa)
reg.KKM2.s <- summary(reg.KKM2)
reg.KKM2.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.KKM2)
X <- model.matrix.mvr(reg.KKM2)#need to be modified since it has problems of collinearity
X <- cbind(1,X)
Xfake <- X[,-12]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))

##Turning Point-Monte Carlo Simulation
x <- reg.KKM2
s <- reg.KKM2.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
X <- X[,-12]
U <- residuals(x)
matrixVar <- inv(t(X)%*%X)%*%t(X)%*%diag((U*sqrt((nrow(X))/(nrow(X)-(ncol(X)))))^2)%*%X%*%inv(t(X)%*%X)
matrixVar <- as.matrix(matrixVar)
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
sims <- as.matrix(sims)
TP <- exp(-((sims[,1])/(2*(sims[,2]))))
c <- summary(TP)
coverage <- mean(TP>0 & TP<20)
output.s <- c(coverage, c[3])
output.s

dim(model.matrix.mvr(reg.KKM2))[1]  #number of observations


###############Table 1A (5) Full. spec (KKM)
##Obtain coefficients
reg.KKM5 <- lm(kkm0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+elf_eth+democ8201+imports8201+
  fuel8201+ores8201+pluralty8201+magn8201+pres8201+leg_british+leg_french+
  + leg_scandinavian+leg_german+leg_socialist+reg_eap+reg_eca+reg_mena+
  reg_we + reg_na+reg_ssa+reg_lac+reg_sa)
reg.KKM5.s <- summary(reg.KKM5)
reg.KKM5.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.KKM5)
X <- model.matrix.mvr(reg.KKM5)
X <- cbind(1,X)
Xfake <- X[,-26]
Xfake <- Xfake[,-18]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))

##Turning Point-Monte Carlo Simulation
x <- reg.KKM5
s <- reg.KKM5.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
X <- X[,-25]
X <- X[,-17]
U <- residuals(x)
matrixVar <- inv(t(X)%*%X)%*%t(X)%*%diag((U*sqrt((nrow(X))/(nrow(X)-(ncol(X)))))^2)%*%X%*%inv(t(X)%*%X)
matrixVar <- as.matrix(matrixVar)
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
sims <- as.matrix(sims)
TP <- exp(-((sims[,1])/(2*(sims[,2]))))
c <- summary(TP)
coverage <- mean(TP>0 & TP<20)
output.s <- c(coverage, c[3])
output.s

dim(model.matrix.mvr(reg.KKM5))[1]  #number of observations


#############################
#############################


###############Table 1A (6) Cook's < 4/n (KKM)
##Double check for outliers influencing the analysis
outlier.test(reg.KKM5)
cookdist <- cookd(reg.KKM5)
good.isocode <- isocode[kkm0205!="NA"]
outliers <- which(cookdist>4/105)
outliers
names.out <- c(isocode[22],isocode[24],isocode[25],isocode[55],isocode[106],isocode[112],isocode[123],isocode[145])
plot(reg.KKM5,4)

##Remove outliers
campante82.Cook <- campante82.No.CHE[isocode!="BWA"& isocode!="CAN"& isocode!="CHL"& isocode!= "GNQ"& isocode!= "NZL" & isocode!="PNG"& isocode!= "SGP" & isocode!="USA",]
campante82.Cook$isocode
detach(campante82.No.CHE)
attach(campante82.Cook)

##Obtain coefficients
reg.KKM6 <- lm(kkm0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+elf_eth+democ8201+imports8201+
  fuel8201+ores8201+pluralty8201+magn8201+pres8201+leg_british+leg_french+
  +leg_scandinavian+leg_german+leg_socialist+reg_eap+reg_eca+reg_mena+
  reg_we+reg_ssa+reg_lac+reg_sa+reg_eca+reg_na)
reg.KKM6.s <- summary(reg.KKM6)
reg.KKM6.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.KKM6)
X <- model.matrix.mvr(reg.KKM6)
X <- cbind(1,X)
Xfake <- X[,-26]
Xfake <- Xfake[,-25]
Xfake <- Xfake[,-18]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))

##Turning Point-Monte Carlo Simulation
x <- reg.KKM6
s <- reg.KKM6.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
U <- residuals(x)
Xfake <- X[,-25]
Xfake <- Xfake[,-24]
Xfake <- Xfake[,-17]
matrixVar <- inv(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(X)))))^2)%*%Xfake%*%inv(t(Xfake)%*%Xfake)
matrixVar <- as.matrix(matrixVar)
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
sims <- as.matrix(sims)
TP <- exp(-((sims[,1])/(2*(sims[,2]))))
c <- summary(TP)
coverage <- mean(TP>0 & TP<20)
output.s <- c(coverage, c[3])
output.s

dim(model.matrix.mvr(reg.KKM6))[1]  #number of observations

detach(campante82.Cook)
attach(campante82.No.CHE)


############################################################################
##REPLICATION Table 1B: Corruption (CPI) and Average Executive Tenure
##Cross sectional regression w/ Turning Point Monte Carlo Simulation
##Stability proxy: log executive tenure 20 years window


###############Table 1B (5) Full spec. (CPI)
##Obtain coefficients
reg.cpi <- lm(cpi0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+elf_eth+democ8201+imports8201+
   fuel8201+ores8201+pluralty8201+magn8201+pres8201+leg_british+leg_french+
   +leg_scandinavian+leg_german+leg_socialist+reg_eap+reg_eca+reg_mena+
   reg_we+reg_na+reg_ssa+reg_lac+reg_sa)
reg.cpi.s <- summary(reg.cpi)
reg.cpi.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.cpi)
X <- model.matrix.mvr(reg.cpi)
X <- cbind(1,X)
Xfake <- X[,-26]
Xfake <- Xfake[,-18]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))

##Turning Point-Monte Carlo Simulation
x <- reg.cpi
s <- reg.cpi.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
U <- residuals(x)
Xfake <- X[,-25]
Xfake <- Xfake[,-17]
matrixVar <- inv(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(X)))))^2)%*%Xfake%*%inv(t(Xfake)%*%Xfake)
matrixVar <- as.matrix(matrixVar)
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
sims <- as.matrix(sims)
TP <- exp(-((sims[,1])/(2*(sims[,2]))))
c <- summary(TP)
coverage <- mean(TP>0 & TP<20)
output.s <- c(coverage, c[3])
output.s

dim(model.matrix.mvr(reg.cpi))[1]  #number of observations


###############Table 1B (5) Full spec. (ICRG)
##Obtain coefficients
reg.icrg <- lm(icrg0205~lavgten_exec8201+lavgten_exec8201sq+lwdigdppc8201+lwdigdppc8201sq+elf_eth+democ8201+imports8201+
  fuel8201+ores8201+pluralty8201+magn8201+pres8201+leg_british+leg_french+
  +leg_scandinavian+leg_german+leg_socialist+reg_eap+reg_eca+reg_mena+
  reg_we+reg_na+reg_ssa+reg_lac+reg_sa)
reg.icrg.s <- summary(reg.icrg)
reg.icrg.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.icrg)
X <- model.matrix.mvr(reg.icrg)
X <- cbind(1,X)
Xfake <- X[,-26]
Xfake <- Xfake[,-18]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))

##Turning Point-Monte Carlo Simulation
x <- reg.icrg
s <- reg.icrg.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
U <- residuals(x)
Xfake <- X[,-25]
Xfake <- Xfake[,-17]
matrixVar <- inv(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(X)))))^2)%*%Xfake%*%inv(t(Xfake)%*%Xfake)
matrixVar <- as.matrix(matrixVar)
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
dim(matrixVar)
length(matrixsubBeta)
sims <- as.matrix(sims)
TP <- exp(-((sims[,1])/(2*(sims[,2]))))
c <- summary(TP)
coverage <- mean(TP>0 & TP<20)
output.s <- c(coverage, c[3])
output.s

dim(model.matrix.mvr(reg.icrg))[1]  #number of observations




############################################################################
##REPLICATION Table 5: Corruption (KKM) and Legislative Majority
##Cross sectional regression w/ Turning Point Monte Carlo Simulation
##Stability proxy: legislative majority is the share of seats

###############Load clean cross sectional data
campante<-read.dta("crosssection_Dec07.dta")
head(campante)
attach(campante)

##Subset the dataset for countries independent before 1982
campante82<-campante[indepdate<1982,]
length(campante82$indepdate)
dim(campante82)
attach(campante82)


###############Table 5 (1) Cross-section Ind. by 1982 (KKM)
##Obtain coefficients
reg.KKM.maj <- lm(kkm0205~ maj8201+maj8201sq+lwdigdppc8201+lwdigdppc8201sq+elf_eth+democ8201+imports8201+
   fuel8201+ores8201+pluralty8201+magn8201+pres8201+leg_british+leg_french+
   +leg_scandinavian+leg_german+leg_socialist+reg_eap+reg_eca+reg_mena+
   reg_we+reg_na+reg_ssa+reg_lac+reg_sa)
reg.KKM.maj.s <- summary(reg.KKM.maj)
reg.KKM.maj.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.KKM.maj)
X <- model.matrix.mvr(reg.KKM.maj) #need to be modified since it has problems of collinearity
X <- cbind(1,X)
Xfake <- X[,-26]
Xfake <- Xfake[,-18]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))  ##MAJORITY SQ DOES NOT MATCH ARTICLE BUT MATCHES LOG FILE from the do.file


##Turning Point-Monte Carlo Simulation
x <- reg.KKM.maj
s <- reg.KKM.maj.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
Xfake <- X[,-25]
Xfake <- Xfake[,-17]
U <- residuals(x)
matrixVar <- inv(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(X))/(nrow(X)-(ncol(X)))))^2)%*%Xfake%*%inv(t(Xfake)%*%Xfake)
matrixVar <- as.matrix(matrixVar)#remember to change in sims too
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
#dim(matrixVar)
#length(matrixsubBeta)
sims <- as.matrix(sims)
TP <- (-((sims[,1])/(2*(sims[,2]))))
#a <-summary(sims[,1])
#b <-summary(sims[,2])
c <-summary(TP)
#d <-quantile(sims[,1],.10)
#e <-quantile(sims[,1],.90)
#f <-quantile(sims[,2],.10)
#g <-quantile(sims[,2],.90)
#h <-quantile(TP,.10)
#i <-quantile(TP,.90)
coverage <- mean(TP>0 & TP<1)
#output <-c(coverage,a,b,c)
output.s <- c(coverage, c[3])
output.s

dim(model.matrix.mvr(reg.KKM.maj))[1]  #number of observations (DOESN'T MATCH ARTICLE, MATCH DO FILE)


###############Table 5 (4) Cross-section Ind. by 1982 (CPI)
##Obtain coefficients
summary(cpi0205)
reg.cpi.maj <- lm(cpi0205~ maj8201+maj8201sq+lwdigdppc8201+lwdigdppc8201sq+elf_eth+democ8201+
  imports8201+fuel8201+ores8201+pluralty8201+magn8201+pres8201+
  leg_british+leg_french+leg_german+leg_socialist+leg_scandinavian+
  reg_eap+reg_eca+reg_mena+reg_we+reg_ssa+reg_lac+reg_sa+reg_na)
reg.cpi.maj.s <- summary(reg.cpi.maj)
reg.cpi.maj.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.cpi.maj)
X <- model.matrix.mvr(reg.cpi.maj)
X <- cbind(1,X)
Xfake <- X[,-26]
Xfake <- Xfake[,-18]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))


##Turning Point-Monte Carlo Simulation
x <- reg.cpi.maj
s <- reg.cpi.maj.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
Xfake <- X[,-25]  #Take out one dummy variable trap, to avoid perfect multicollinearity
Xfake <- Xfake[,-17]  #Take out another dummy variable trap
U <- residuals(x)
matrixVar <- inv(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(X))/(nrow(X)-(ncol(X)))))^2)%*%Xfake%*%inv(t(Xfake)%*%Xfake)  #vcov w/ HWSE hc1
matrixVar <- as.matrix(matrixVar) #make sure vcov is a matrix
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])  #leave out intercept in vec of betas
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)  #simulate 1000 draws from MVN using subBeta and Var
sims <- as.matrix(sims)  #make sure it's a matrix
TP <- (-((sims[,1])/(2*(sims[,2]))))  #calculate turning pt, -b/2a from quadratic formula
c <-summary(TP)  #obtain summary of TP including median
coverage <- mean(TP>0 & TP<1)  #proportion of times TP falls into this interval
output.s <- c(coverage, c[3])  
output.s  #output coverage and median of TP

dim(model.matrix.mvr(reg.cpi.maj))[1]  #number of observations


###############Table 5 (7) Cross-section Ind. by 1982 (ICRG)
##Obtain coefficients
reg.icrg.maj <- lm(icrg0205~ maj8201+maj8201sq+lwdigdppc8201+lwdigdppc8201sq+elf_eth+democ8201+
  imports8201+fuel8201+ores8201+pluralty8201+magn8201+pres8201+
  leg_british+leg_french+leg_scandinavian+leg_german+leg_socialist+
  reg_eap+reg_eca+reg_mena+reg_we+reg_na+reg_ssa+reg_lac+reg_sa,model=TRUE)
reg.icrg.maj.s <- summary(reg.icrg.maj)
reg.icrg.maj.s

##Calculate huber-white robust standard error (using hc1 correction)
U <- residuals(reg.icrg.maj)
X <- model.matrix.mvr(reg.icrg.maj)
X <- cbind(1,X)
Xfake <- X[,-26]
Xfake <- Xfake[,-18]
a <- solve(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(Xfake))/(nrow(Xfake)-(ncol(Xfake)))))^2)%*%Xfake%*%solve(t(Xfake)%*%Xfake)
sqrt(diag(a))


##Turning Point-Monte Carlo Simulation
x <- reg.icrg.maj
s <- reg.icrg.maj.s
matrixBeta <- as.matrix(t(s$coeff[,1]))
X <- model.matrix.mvr(x)
Xfake <- X[,-25]
Xfake <- Xfake[,-17]
U <- residuals(x)
matrixVar <- inv(t(Xfake)%*%Xfake)%*%t(Xfake)%*%diag((U*sqrt((nrow(X))/(nrow(X)-(ncol(X)))))^2)%*%Xfake%*%inv(t(Xfake)%*%Xfake)
matrixVar <- as.matrix(matrixVar)#remember to change in sims too
matrixsubBeta <- as.vector(matrixBeta[,2:ncol(matrixBeta)])
sims <- mvrnorm(n=1000, matrixsubBeta, matrixVar)
sims <- as.matrix(sims)
TP <- (-((sims[,1])/(2*(sims[,2]))))
c <-summary(TP)
coverage <- mean(TP>0 & TP<1)
output.s <- c(coverage, c[3])
output.s

dim(model.matrix.mvr(reg.icrg.maj))[1]  #number of observations

